R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(ISLR)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(infer)
library(knitr)
library(broom)
library(dplyr)
library(ggplot2)
library(readr)
earthquakes <- read_csv("Downloads/earthquakes.csv")
## Rows: 1137 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (24): id, type, title, url, detailUrl, alert, status, net, code, ids, s...
## dbl  (18): magnitude, time, updated, felt, cdi, mmi, tsunami, sig, nst, dmin...
## dttm  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
eq <- earthquakes[, -c(3,4,6,7,8,9,14,16,17,18,19,21,20,26,27,31,33,37:43)]
glimpse(eq) 
## Rows: 1,137
## Columns: 19
## $ id         <chr> "us7000necw", "tx2024shcj", "ci40734823", "tx2024scvz", "us…
## $ magnitude  <dbl> 4.80, 5.10, 3.70, 3.90, 4.10, 3.23, 3.43, 3.44, 4.69, 3.50,…
## $ date       <dttm> 2024-09-17 00:49:42, 2024-09-17 00:49:42, 2024-09-16 11:22…
## $ felt       <dbl> 1893, 2042, 1580, 5, 4, 472, 234, 225, 18592, 0, 5, 1, 2, 1…
## $ cdi        <dbl> 6, 6, 4, 3, 3, 4, 4, 4, 5, 0, 3, 3, 3, 3, 4, 5, 4, 4, 5, 3,…
## $ mmi        <dbl> 5, 5, 4, 4, 4, 3, 3, 3, 5, 3, 4, 4, 4, 4, 4, 5, 4, 4, 5, 2,…
## $ alert      <chr> "green", "green", NA, "green", "green", NA, NA, NA, "green"…
## $ tsunami    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,…
## $ nst        <dbl> 37, 24, 135, 38, 28, 131, 111, 115, 145, 14, 32, 34, 45, 32…
## $ dmin       <dbl> 0.287000, 0.000000, 0.032940, 0.000000, 0.043000, 0.031370,…
## $ rms        <dbl> 0.4800, 0.1000, 0.2800, 0.1000, 0.2600, 0.2600, 0.2900, 0.3…
## $ gap        <dbl> 70.00, 38.00, 99.00, 65.00, 67.00, 88.00, 92.00, 84.00, 101…
## $ depth      <dbl> 4.0440, 6.1279, 10.9000, 6.2024, 8.8140, 10.9500, 11.0200, …
## $ latitude   <dbl> 32.3984, 32.4140, 34.0678, 31.6470, 31.6323, 34.0637, 34.05…
## $ longitude  <dbl> -102.044, -102.057, -118.807, -104.450, -104.473, -118.812,…
## $ distanceKM <dbl> 33, 34, 6, 58, 60, 6, 5, 7, 6, 7, 56, 61, 57, 61, 15, 4, 5,…
## $ location   <chr> "Ackerly, Texas", "Ackerly, Texas", "Malibu, CA", "Whites C…
## $ continent  <chr> "North America", "North America", "North America", "North A…
## $ country    <chr> "United States of America (the)", "United States of America…
ggplot(data = eq, aes(x = depth, y = magnitude)) + geom_point(color = 'sienna', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Depth vs Magnitude", x = "Depth (km)", y = "Magnitude")

eq %>%
summarise(correlation = cor(magnitude, depth))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1       0.310
ggplot(data = eq, aes(x = depth, y = magnitude)) + geom_point(color = 'sienna', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + scale_x_log10() + labs(title = "Depth (log scale) vs Magnitude", x = "Depth (km)", y = "Magnitude")
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

eq %>%
  filter(depth > 0) %>%
  summarise(correlation = cor(magnitude, log10(depth)))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1       0.481
ggplot(data = eq, aes(x = felt, y = magnitude)) + geom_point(color = 'tan', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Amount of People Who Felt Earthquake vs Magnitude", x = "People", y = "Magnitude")

eq %>%
summarise(correlation = cor(magnitude, felt))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1    -0.00757
ggplot(data = eq, aes(x = felt, y = magnitude)) + geom_point(color = 'tan', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted'))  + scale_x_log10() + labs(title = "Amount of People Who Felt Earthquake (log scale) vs Magnitude", x = "People", y = "Magnitude")
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

eq %>%
  filter(felt > 0) %>%
  summarise(correlation = cor(magnitude, log10(felt)))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1      0.0152
ggplot(data = eq, aes(x = latitude, y = magnitude)) + geom_point(color = 'royalblue2', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Latitude vs Magnitude", x = "Latitude", y = "Magnitude")

eq %>%
summarise(correlation = cor(magnitude, latitude))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1      -0.411
ggplot(data = eq, aes(x = longitude, y = magnitude)) + geom_point(color = 'forestgreen', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Longitude vs Magnitude", x = "Longitude", y = "Magnitude")

eq %>%
summarise(correlation = cor(magnitude, longitude))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1       0.747
ggplot(data = eq, aes(x = longitude, y = latitude)) + geom_point(color = 'seagreen3', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Longitude vs Latitude", x = "Longitude", y = "Latitude") + xlim(c(-180,180)) + ylim(c(-90,90))

ggplot(data = eq, aes(x = longitude, y = latitude, color = magnitude)) + geom_point(alpha = 0.75, size = 1.5) + scale_color_gradient(low = "green", high = "red") + theme(panel.background = element_rect(fill = 'azure2', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Longitude vs Latitude by Magnitude", x = "Longitude", y = "Latitude") + xlim(c(-180,180)) + ylim(c(-90,90))

eq_continents <- eq %>%
  filter(continent %in% c("North America", "South America","Europe", "Asia", "Africa", "Oceania", "Insular Oceania"))
  
  
ggplot(data = eq_continents, aes(x = continent, y = magnitude, fill = continent)) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + geom_boxplot() + labs(title = "Earthquake Magnitude by Continent", x = "Continent", y = "Magnitude")

eq$continent <- as.factor(eq$continent)

magnitude_model <- lm(magnitude ~ depth + felt + longitude + latitude + continent, data = eq)

tidy(magnitude_model)
## # A tibble: 12 × 5
##    term                        estimate  std.error statistic  p.value
##    <chr>                          <dbl>      <dbl>     <dbl>    <dbl>
##  1 (Intercept)               5.24       0.306        17.1    1.01e-56
##  2 depth                     0.00121    0.000305      3.96   7.97e- 5
##  3 felt                      0.00000990 0.00000274    3.62   3.16e- 4
##  4 longitude                 0.00338    0.000898      3.76   1.82e- 4
##  5 latitude                  0.00130    0.00174       0.746  4.56e- 1
##  6 continentAsia             0.00615    0.320         0.0192 9.85e- 1
##  7 continentEurope           0.188      0.370         0.510  6.10e- 1
##  8 continentInsluar Oceania -0.692      0.620        -1.12   2.64e- 1
##  9 continentInsular Oceania -1.11       0.622        -1.79   7.44e- 2
## 10 continentNorth America   -1.11       0.329        -3.37   7.98e- 4
## 11 continentOceania         -0.272      0.619        -0.440  6.60e- 1
## 12 continentSouth America    0.615      0.335         1.84   6.64e- 2
glance(magnitude_model) %>%
  select('r.squared', 'adj.r.squared')
## # A tibble: 1 × 2
##   r.squared adj.r.squared
##       <dbl>         <dbl>
## 1     0.766         0.763
best_magnitude_model <- step(object = magnitude_model, direction = "backward", trace = FALSE)
tidy(best_magnitude_model)
## # A tibble: 11 × 5
##    term                        estimate  std.error statistic  p.value
##    <chr>                          <dbl>      <dbl>     <dbl>    <dbl>
##  1 (Intercept)               5.25       0.306         17.2   5.33e-57
##  2 depth                     0.00121    0.000305       3.95  8.38e- 5
##  3 felt                      0.00000998 0.00000273     3.65  2.78e- 4
##  4 longitude                 0.00311    0.000822       3.78  1.68e- 4
##  5 continentAsia             0.0594     0.312          0.190 8.49e- 1
##  6 continentEurope           0.237      0.364          0.653 5.14e- 1
##  7 continentInsluar Oceania -0.706      0.619         -1.14  2.54e- 1
##  8 continentInsular Oceania -1.12       0.622         -1.80  7.15e- 2
##  9 continentNorth America   -1.10       0.328         -3.35  8.55e- 4
## 10 continentOceania         -0.292      0.618         -0.472 6.37e- 1
## 11 continentSouth America    0.558      0.326          1.71  8.71e- 2
best_magnitude_model %>%
  glance() %>%
  select(r.squared, adj.r.squared)
## # A tibble: 1 × 2
##   r.squared adj.r.squared
##       <dbl>         <dbl>
## 1     0.766         0.763
eq$tsunami <- as.factor(eq$tsunami)

ggplot(data = eq, aes(x = tsunami, y = magnitude, fill = tsunami)) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + geom_boxplot() + scale_fill_manual(values = c("0" = "lightblue", "1" = "brown3"), labels = c("No Tsunami Warning", "Tsunami Warning")) + labs(title = "Magnitude by Tsunami Warning", x = "Tsunami", y = "Magnitude")

eq %>%
summarise('No Tsunami Magnitude' = mean(magnitude[tsunami == 0]),
          'Tsunami Magnitude' = mean(magnitude[tsunami ==1]))
## # A tibble: 1 × 2
##   `No Tsunami Magnitude` `Tsunami Magnitude`
##                    <dbl>               <dbl>
## 1                   4.80                5.80
eq$alert <- as.factor(eq$alert)
eq_alert <- eq %>% 
  filter(!is.na(alert))
 
eq_alert %>%
  count(alert)
## # A tibble: 4 × 2
##   alert      n
##   <fct>  <int>
## 1 green    711
## 2 orange     6
## 3 red        9
## 4 yellow    38
ggplot(data = eq_alert, aes(x = alert, y = magnitude, fill = alert)) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + geom_boxplot() + scale_fill_manual(values = c('green', 'orange', 'red', 'yellow')) + labs(title = "Earthquake Magnitude by Alert Level", x = "Alert", y = "Magnitude")

eq_alert %>%
summarise('Green Alert Magnitude' = mean(magnitude[alert == "green"]),
          'Yellow Alert Magnitude' = mean(magnitude[alert == "yellow"]),
          'Orange Alert Magnitude' = mean(magnitude[alert == "orange"]),
          'Red Alert Magnitude' = mean(magnitude[alert == "red"]))
## # A tibble: 1 × 4
##   `Green Alert Magnitude` `Yellow Alert Magnitude` `Orange Alert Magnitude`
##                     <dbl>                    <dbl>                    <dbl>
## 1                    5.39                     6.27                      6.3
## # ℹ 1 more variable: `Red Alert Magnitude` <dbl>
ggplot(data = eq, aes(x = magnitude, y = mmi)) + geom_jitter(color = 'steelblue', alpha = 0.75, size = 2.5, height = 0.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Magnitude vs MMI", x = "Magnitude (km)", y = "MMI") + geom_smooth(method = "lm", color = 'red')
## `geom_smooth()` using formula = 'y ~ x'

eq %>%
summarise(correlation = cor(magnitude, mmi))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1       0.445
ggplot(data = eq, aes(x = magnitude, y = rms)) + geom_jitter(color = 'steelblue', alpha = 0.75, size = 2.5) + theme(panel.background = element_rect(fill = 'ghostwhite', color = 'black'), panel.grid.major = element_line(color = 'gray', linetype = 'dotted')) + labs(title = "Magnitude vs RMS", x = "Magnitude", y = "RMS") + geom_smooth(method = "lm", color = 'red')
## `geom_smooth()` using formula = 'y ~ x'

eq %>%
summarise(correlation = cor(magnitude, rms))
## # A tibble: 1 × 1
##   correlation
##         <dbl>
## 1       0.600
updated_magnitude_model <- lm(magnitude ~ log10(depth) + felt + longitude + latitude + continent + mmi + rms + tsunami + alert, data = eq)
## Warning in eval(predvars, data, env): NaNs produced
tidy(updated_magnitude_model)
## # A tibble: 18 × 5
##    term                        estimate  std.error statistic  p.value
##    <chr>                          <dbl>      <dbl>     <dbl>    <dbl>
##  1 (Intercept)               3.52       0.243         14.5   2.66e-40
##  2 log10(depth)              0.532      0.0489        10.9   4.73e-25
##  3 felt                     -0.00000322 0.00000188    -1.71  8.71e- 2
##  4 longitude                 0.00541    0.000675       8.02  6.91e-15
##  5 latitude                  0.000974   0.00129        0.754 4.51e- 1
##  6 continentAsia            -0.346      0.213         -1.63  1.04e- 1
##  7 continentEurope           0.0698     0.247          0.282 7.78e- 1
##  8 continentInsluar Oceania -1.04       0.412         -2.53  1.17e- 2
##  9 continentInsular Oceania -1.41       0.413         -3.42  6.77e- 4
## 10 continentNorth America    0.0646     0.226          0.286 7.75e- 1
## 11 continentOceania         -0.818      0.412         -1.99  4.76e- 2
## 12 continentSouth America    0.665      0.224          2.97  3.12e- 3
## 13 mmi                       0.176      0.0172        10.2   2.02e-22
## 14 rms                       0.163      0.0697         2.34  1.98e- 2
## 15 tsunami1                  0.708      0.0670        10.6   6.94e-24
## 16 alertorange               0.710      0.152          4.68  3.68e- 6
## 17 alertred                  0.643      0.129          4.98  8.80e- 7
## 18 alertyellow               0.285      0.0700         4.07  5.42e- 5
glance(updated_magnitude_model) %>%
  select('r.squared', 'adj.r.squared')
## # A tibble: 1 × 2
##   r.squared adj.r.squared
##       <dbl>         <dbl>
## 1     0.791         0.785
final_magnitude_model <- step(object = updated_magnitude_model, direction = "forward", trace = FALSE)
tidy(final_magnitude_model)
## # A tibble: 18 × 5
##    term                        estimate  std.error statistic  p.value
##    <chr>                          <dbl>      <dbl>     <dbl>    <dbl>
##  1 (Intercept)               3.52       0.243         14.5   2.66e-40
##  2 log10(depth)              0.532      0.0489        10.9   4.73e-25
##  3 felt                     -0.00000322 0.00000188    -1.71  8.71e- 2
##  4 longitude                 0.00541    0.000675       8.02  6.91e-15
##  5 latitude                  0.000974   0.00129        0.754 4.51e- 1
##  6 continentAsia            -0.346      0.213         -1.63  1.04e- 1
##  7 continentEurope           0.0698     0.247          0.282 7.78e- 1
##  8 continentInsluar Oceania -1.04       0.412         -2.53  1.17e- 2
##  9 continentInsular Oceania -1.41       0.413         -3.42  6.77e- 4
## 10 continentNorth America    0.0646     0.226          0.286 7.75e- 1
## 11 continentOceania         -0.818      0.412         -1.99  4.76e- 2
## 12 continentSouth America    0.665      0.224          2.97  3.12e- 3
## 13 mmi                       0.176      0.0172        10.2   2.02e-22
## 14 rms                       0.163      0.0697         2.34  1.98e- 2
## 15 tsunami1                  0.708      0.0670        10.6   6.94e-24
## 16 alertorange               0.710      0.152          4.68  3.68e- 6
## 17 alertred                  0.643      0.129          4.98  8.80e- 7
## 18 alertyellow               0.285      0.0700         4.07  5.42e- 5
final_magnitude_model %>%
  glance() %>%
  select(r.squared, adj.r.squared)
## # A tibble: 1 × 2
##   r.squared adj.r.squared
##       <dbl>         <dbl>
## 1     0.791         0.785
manual_magnitude_model <- lm(magnitude ~ log10(depth) + felt + longitude + latitude + continent + mmi + rms + tsunami, data = eq)
## Warning in eval(predvars, data, env): NaNs produced
tidy(manual_magnitude_model)
## # A tibble: 15 × 5
##    term                         estimate  std.error statistic  p.value
##    <chr>                           <dbl>      <dbl>     <dbl>    <dbl>
##  1 (Intercept)               2.77        0.243        11.4    4.23e-28
##  2 log10(depth)              0.562       0.0415       13.6    4.96e-38
##  3 felt                     -0.000000478 0.00000200   -0.239  8.11e- 1
##  4 longitude                 0.00525     0.000645      8.14   1.36e-15
##  5 latitude                 -0.0000532   0.00124      -0.0430 9.66e- 1
##  6 continentAsia            -0.239       0.228        -1.05   2.94e- 1
##  7 continentEurope           0.0702      0.263         0.267  7.90e- 1
##  8 continentInsluar Oceania -1.08        0.440        -2.46   1.39e- 2
##  9 continentInsular Oceania -1.20        0.442        -2.72   6.66e- 3
## 10 continentNorth America   -0.134       0.237        -0.565  5.73e- 1
## 11 continentOceania         -0.965       0.440        -2.19   2.85e- 2
## 12 continentSouth America    0.756       0.237         3.19   1.46e- 3
## 13 mmi                       0.277       0.0137       20.3    5.31e-75
## 14 rms                       0.348       0.0578        6.01   2.72e- 9
## 15 tsunami1                  0.735       0.0634       11.6    6.36e-29
glance(manual_magnitude_model) %>%
  select('r.squared', 'adj.r.squared')
## # A tibble: 1 × 2
##   r.squared adj.r.squared
##       <dbl>         <dbl>
## 1     0.882         0.880
no_tsunami <- eq %>%
  filter(tsunami == 0)

yes_tsunami <- eq %>%
  filter(tsunami == 1)

no_avg_magnitude <- mean(no_tsunami$magnitude)
yes_avg_magnitude <- mean(yes_tsunami$magnitude)

null_dist_tsunami <- yes_tsunami %>% 
  specify(response = magnitude) %>%    #variable to work with
  hypothesize(null = "point", mu = no_avg_magnitude) %>%    #set null hypothesis
  generate(reps = 10000, type = "bootstrap") %>%    #generate samples
  calculate(stat = "mean")    #compute mean for each sample

null_dist_tsunami %>% 
ggplot(mapping = aes(x = stat)) + 
geom_histogram(fill = 'steelblue', color = "black", alpha = 0.75, bins = 30) + 
labs(x = "Total Magnitude", y = "Count", title = "Null Distribution of Magnitude for Tsunami Warnings", caption = "10000 bootstrap sample means") + geom_vline(aes(xintercept = yes_avg_magnitude), col = 'red', size = 1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

null_dist_tsunami %>% 
  filter(stat >= yes_avg_magnitude) %>% 
  summarise(p_value = n() / nrow(null_dist_tsunami))
## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0
eq_north_america <- eq %>%
  filter(continent == "North America")

eq_south_america <- eq %>%
  filter(continent == "South America")

eq_africa <- eq %>%
  filter(continent == "Africa")

eq_europe <- eq %>%
  filter(continent == "Europe")

eq_asia <- eq %>%
  filter(continent == "Asia")

eq_oceania <- eq %>%
  filter(continent %in% c("Oceania", "Insular Oceania"))

na_avg_magnitude <- mean(eq_north_america$magnitude)
sa_avg_magnitude <- mean(eq_south_america$magnitude)
africa_avg_magnitude <- mean(eq_africa$magnitude)
europe_avg_magnitude <- mean(eq_europe$magnitude)
asia_avg_magnitude <- mean(eq_asia$magnitude)
oceania_avg_magnitude <- mean(eq_oceania$magnitude)
null_dist_na <- eq_south_america %>% 
  specify(response = magnitude) %>%
  hypothesize(null = "point", mu = na_avg_magnitude) %>%
  generate(reps = 10000, type = "bootstrap") %>%
  calculate(stat = "mean")

null_dist_africa <- eq_south_america %>% 
  specify(response = magnitude) %>%
  hypothesize(null = "point", mu = africa_avg_magnitude) %>%
  generate(reps = 10000, type = "bootstrap") %>%
  calculate(stat = "mean")

null_dist_europe <- eq_south_america %>% 
  specify(response = magnitude) %>%
  hypothesize(null = "point", mu = europe_avg_magnitude) %>%
  generate(reps = 10000, type = "bootstrap") %>%
  calculate(stat = "mean")

null_dist_asia <- eq_south_america %>% 
  specify(response = magnitude) %>%
  hypothesize(null = "point", mu = asia_avg_magnitude) %>%
  generate(reps = 10000, type = "bootstrap") %>%
  calculate(stat = "mean")

null_dist_oceania <- eq_south_america %>% 
  specify(response = magnitude) %>%
  hypothesize(null = "point", mu = oceania_avg_magnitude) %>%
  generate(reps = 10000, type = "bootstrap") %>%
  calculate(stat = "mean")
na_filter <- null_dist_na %>% 
  filter(stat >= sa_avg_magnitude)

africa_filter <- null_dist_africa %>% 
  filter(stat >= sa_avg_magnitude)

europe_filter <- null_dist_europe %>% 
  filter(stat >= sa_avg_magnitude)

asia_filter <- null_dist_asia %>% 
  filter(stat >= sa_avg_magnitude)

oceania_filter <- null_dist_oceania %>% 
  filter(stat >= sa_avg_magnitude)
  
p1 <- (p_value_na = nrow(na_filter) / nrow(null_dist_oceania))
p2 <- (p_value_na = nrow(africa_filter) / nrow(null_dist_oceania))
p3 <- (p_value_na = nrow(europe_filter) / nrow(null_dist_oceania))
p4 <- (p_value_na = nrow(asia_filter) / nrow(null_dist_oceania))
p5 <- (p_value_na = nrow(oceania_filter) / nrow(null_dist_oceania))


continent_p_values <- data.frame(
  continent = c("North America", "Africa","Europe", "Asia", "Oceania"),
  p_value = c(p1,p2,p3,p4,p5))

continent_p_values
##       continent p_value
## 1 North America  0.0000
## 2        Africa  0.0000
## 3        Europe  0.0000
## 4          Asia  0.0302
## 5       Oceania  0.0000